import urllib.request as request
from bs4 import BeautifulSoup
import pymysql
_list = []
url = 'https://movie.douban.com/top250'


def get_html(_url):
	res = request.urlopen(url=_url)
	return res.read().decode()


def parse_html(_html):
	soup = BeautifulSoup(_html, 'html.parser')
	movie_zone = soup.find('ol')
	movie_list = movie_zone.find_all('li')
	for movie in movie_list:
		movie_url = movie.find('a').get('href')
		movie_name = movie.find('span', attrs={'class': 'title'}).getText()
		temp = {
			'url': movie_url,
			'name': movie_name
		}
		_list.append(temp)
	next_page = soup.find('span', attrs={'class': 'next'}).find('a')
	if next_page:
		new_url = url + next_page['href']
		parse_html(get_html(new_url))


def save_html():
	conn = pymysql.connect(host='localhost', user='root', password='root', db='spiders')
	cursor = conn.cursor()
	for _id, movie in enumerate(_list):
		_uri = movie['url']
		name = movie['name']
		sql = 'insert into movie values(%s,%s,%s)'
		cursor.execute(sql, (_id, _uri, name))
	conn.commit()
	cursor.close()
	conn.close()


if __name__ == "__main__":
	html = get_html(url)
	parse_html(html=html)
	save_html()
